import scanpy as sc, anndata as ad, numpy as np, pandas as pd
from scipy import sparse
from anndata import AnnData
import warnings
import socket
import holoviews as hv
import plotly.express as px
import kaleido
from matplotlib import pylab
import sys
import yaml
import os
from pandas.api.types import CategoricalDtype
import plotly
import random
import matplotlib.pyplot as plt
warnings.filterwarnings('ignore')
plotly.offline.init_notebook_mode()
%matplotlib inline
import ipynbname
nb_fname = ipynbname.name()
sc.settings.verbosity = 3 # verbosity: errors (0), warnings (1), info (2), hints (3)
sc.logging.print_header()
sc.settings.set_figure_params(dpi=50, facecolor='white')
pylab.rcParams['figure.figsize'] = (9, 9)
scanpy==1.8.1 anndata==0.7.6 umap==0.4.6 numpy==1.20.2 scipy==1.6.3 pandas==1.2.4 scikit-learn==0.24.2 statsmodels==0.13.1 python-igraph==0.9.8 louvain==0.7.1 pynndescent==0.5.5
hostRoot = "-".join(socket.gethostname().split('-')[0:2])
outdir="./outdir"
adataPath = outdir+"/3_polaroid_quickAnno.h5ad"
figDir = "./figures"
leiden3Mapping = {"0":"Encs-1",
"1":"CBC/BRCs-1",
"2":"EXN-1",
"3":"EnCs-2",
"4":"RGCs-1",
"5":"RGCs-2",
"6":"ccRGCs",
"7":"inPCs",
"8":"EXN-2",
"9":"RtCs",
"10":"ER-Cs",
"11":"CBC/BRCs-2"}
adata = sc.read_h5ad(adataPath)
adata.obs["leidenAnno"] = adata.obs["leiden0.3"].replace(leiden3Mapping).astype("category").cat.reorder_categories(list(leiden3Mapping.values()))
adata.uns["leidenAnno_colors"] = adata.uns["leiden0.3_colors"].copy()
adata
AnnData object with n_obs × n_vars = 18822 × 28371
obs: 'dataset', 'organoid', 'region', 'type', 'type_region', 'regionContrast', 'n_genes_by_counts', 'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts', 'total_counts_mt', 'log1p_total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'log1p_total_counts_ribo', 'pct_counts_ribo', 'n_genes', 'is.Stressed', 'leiden0.3', 'leidenAnno'
var: 'mt', 'ribo', 'n_cells_by_counts', 'mean_counts', 'log1p_mean_counts', 'pct_dropout_by_counts', 'total_counts', 'log1p_total_counts', 'n_cells', 'highly_variable', 'mean', 'std'
uns: 'dataset_colors', 'dendrogram_leidenAnno', 'diffmap_evals', 'draw_graph', 'is.Stressed_colors', 'leiden', 'leiden0.3_colors', 'leidenAnno_colors', 'neighbors', 'organoid_colors', 'pca', 'rank_genes_groups', 'regionContrast_colors', 'region_colors', 'type_colors', 'umap'
obsm: 'X_diffmap', 'X_draw_graph_fa', 'X_pca', 'X_umap'
varm: 'PCs'
obsp: 'connectivities', 'distances'
sc.pl.umap(adata, color=["leidenAnno","type","region","regionContrast"], ncols = 2, size = 30, add_outline = True,outline_width=(0.2, 0.05), frameon=False)
# Ordering definition
compositions = pd.DataFrame(adata.obs.groupby(["region","leidenAnno"]).size())
compositions = compositions.reset_index().rename(columns={0:"number_of_cells"})
compositions["number_of_cells_normByregion"] = np.array(compositions["number_of_cells"]) / np.array(compositions.groupby("region")["number_of_cells"].sum()[compositions.region])
compositions["number_of_cells_normByregionANDleidenAnno"] = np.array(compositions["number_of_cells_normByregion"]) / np.array(compositions.groupby("leidenAnno")["number_of_cells_normByregion"].sum()[compositions["leidenAnno"]])
leidenOrder = compositions[compositions["region"] == "proximal"].sort_values("number_of_cells_normByregionANDleidenAnno", ascending=False)["leidenAnno"].tolist()
leidenOrder
['RGCs-2', 'EXN-2', 'ccRGCs', 'Encs-1', 'RtCs', 'CBC/BRCs-1', 'CBC/BRCs-2', 'RGCs-1', 'EnCs-2', 'inPCs', 'ER-Cs', 'EXN-1']
## Not normalized
adata = sc.read_h5ad(adataPath)
adata.obs["leidenAnno"] = adata.obs["leiden0.3"].replace(leiden3Mapping).astype("category").cat.reorder_categories(list(leiden3Mapping.values()))
adata.uns["leidenAnno_colors"] = adata.uns["leiden0.3_colors"].copy()
adata = adata[adata.obs["type"].isin(["control"])]
variable1="region"
variable2="leidenAnno"
compositions = pd.DataFrame(adata.obs.groupby([variable1,variable2]).size())
compositions = compositions.reset_index().rename(columns={0:"number_of_cells"})
fig = px.bar(compositions, x=variable2, y="number_of_cells", color=variable1,
category_orders={variable2:leidenOrder},
height=800,width=1000, template="plotly_white",
color_discrete_map = dict(zip(adata.obs[variable1].cat.categories,adata.uns[variable1+"_colors"])),title="## Not normalized",
)
fig.update_traces(marker_line_color='black',
marker_line_width=1, opacity=1)
fig.update_layout( yaxis = dict( tickfont = dict(size=30), title_font=dict(size=30)),legend = dict(font = dict(size = 30)),
xaxis = dict( tickfont = dict(size=30), title_font=dict(size=30))
)
fig.update_xaxes(tickangle=-45)
fig.write_image(figDir+"/"+nb_fname+"Not_norm.ctlOnly"+".pdf")
fig.show()
## Normalized by region
compositions = pd.DataFrame(adata.obs.groupby([variable1,variable2]).size())
compositions = compositions.reset_index().rename(columns={0:"number_of_cells"})
compositions["number_of_cells_normByregion"] = np.array(compositions["number_of_cells"]) / np.array(compositions.groupby(variable1)["number_of_cells"].sum()[compositions[variable1]])
fig = px.bar(compositions, x=variable2, y="number_of_cells_normByregion", color=variable1,
height=800,width=1000, template="plotly_white",
category_orders={variable2:leidenOrder},
color_discrete_map = dict(zip(adata.obs[variable1].cat.categories,adata.uns[variable1+"_colors"])),title="## Normalized by total of each region",
)
fig.update_traces(marker_line_color='black',
marker_line_width=1, opacity=1)
fig.update_layout( yaxis = dict( tickfont = dict(size=30), title_font=dict(size=30)),legend = dict(font = dict(size = 30)),
xaxis = dict( tickfont = dict(size=30), title_font=dict(size=30))
)
fig.update_xaxes(tickangle=-45)
fig.write_image(figDir+"/"+nb_fname+"Region_Norm.ctlOnly"+".pdf")
fig.show()
## Normalized by region and leiden
compositions = pd.DataFrame(adata.obs.groupby([variable1,variable2]).size())
compositions = compositions.reset_index().rename(columns={0:"number_of_cells"})
compositions["number_of_cells_normByregion"] = np.array(compositions["number_of_cells"]) / np.array(compositions.groupby(variable1)["number_of_cells"].sum()[compositions[variable1]])
compositions["number_of_cells_normByregionANDLeiden"] = np.array(compositions["number_of_cells_normByregion"]) / np.array(compositions.groupby(variable2)["number_of_cells_normByregion"].sum()[compositions[variable2]])
fig = px.bar(compositions, x=variable2, y="number_of_cells_normByregionANDLeiden", color=variable1,
height=800,width=1000, template="plotly_white",
category_orders={variable2:leidenOrder},
color_discrete_map = dict(zip(adata.obs[variable1].cat.categories,adata.uns[variable1+"_colors"])),title="## Normalized by total of each region and total of each leiden",
)
fig.update_traces(marker_line_color='black',
marker_line_width=1, opacity=1)
fig.update_layout( yaxis = dict( tickfont = dict(size=30), title_font=dict(size=30)),legend = dict(font = dict(size = 30)),
xaxis = dict( tickfont = dict(size=30), title_font=dict(size=30))
)
fig.update_xaxes(tickangle=-45)
fig.write_image(figDir+"/"+nb_fname+"Leiden_Region_Norm.ctlOnly"+".pdf")
fig.show()
## Not normalized
adata = sc.read_h5ad(adataPath)
adata.obs["leidenAnno"] = adata.obs["leiden0.3"].replace(leiden3Mapping).astype("category").cat.reorder_categories(list(leiden3Mapping.values()))
adata.uns["leidenAnno_colors"] = adata.uns["leiden0.3_colors"].copy()
adata = adata[adata.obs["type"].isin(["polaroid"])]
variable1="region"
variable2="leidenAnno"
compositions = pd.DataFrame(adata.obs.groupby([variable1,variable2]).size())
compositions = compositions.reset_index().rename(columns={0:"number_of_cells"})
fig = px.bar(compositions, x=variable2, y="number_of_cells", color=variable1,
category_orders={variable2:leidenOrder},
height=800,width=1000, template="plotly_white",
color_discrete_map = dict(zip(adata.obs[variable1].cat.categories,adata.uns[variable1+"_colors"])),title="## Not normalized",
)
fig.update_traces(marker_line_color='black',
marker_line_width=1, opacity=1)
fig.update_layout( yaxis = dict( tickfont = dict(size=30), title_font=dict(size=30)),legend = dict(font = dict(size = 30)),
xaxis = dict( tickfont = dict(size=30), title_font=dict(size=30))
)
fig.write_image(figDir+"/"+nb_fname+"Not_norm.polaroidOnly"+".pdf")
fig.show()
## Normalized by region
compositions = pd.DataFrame(adata.obs.groupby([variable1,variable2]).size())
compositions = compositions.reset_index().rename(columns={0:"number_of_cells"})
compositions["number_of_cells_normByregion"] = np.array(compositions["number_of_cells"]) / np.array(compositions.groupby(variable1)["number_of_cells"].sum()[compositions[variable1]])
fig = px.bar(compositions, x=variable2, y="number_of_cells_normByregion", color=variable1,
height=800,width=1000, template="plotly_white",
category_orders={variable2:leidenOrder},
color_discrete_map = dict(zip(adata.obs[variable1].cat.categories,adata.uns[variable1+"_colors"])),title="## Normalized by total of each region",
)
fig.update_traces(marker_line_color='black',
marker_line_width=1, opacity=1)
fig.update_layout( yaxis = dict( tickfont = dict(size=30), title_font=dict(size=30)),legend = dict(font = dict(size = 30)),
xaxis = dict( tickfont = dict(size=30), title_font=dict(size=30))
)
fig.write_image(figDir+"/"+nb_fname+"Region_Norm.polaroidOnly"+".pdf")
fig.show()
## Normalized by region and leiden
compositions = pd.DataFrame(adata.obs.groupby([variable1,variable2]).size())
compositions = compositions.reset_index().rename(columns={0:"number_of_cells"})
compositions["number_of_cells_normByregion"] = np.array(compositions["number_of_cells"]) / np.array(compositions.groupby(variable1)["number_of_cells"].sum()[compositions[variable1]])
compositions["number_of_cells_normByregionANDLeiden"] = np.array(compositions["number_of_cells_normByregion"]) / np.array(compositions.groupby(variable2)["number_of_cells_normByregion"].sum()[compositions[variable2]])
fig = px.bar(compositions, x=variable2, y="number_of_cells_normByregionANDLeiden", color=variable1,
height=800,width=1000, template="plotly_white",
category_orders={variable2:leidenOrder},
color_discrete_map = dict(zip(adata.obs[variable1].cat.categories,adata.uns[variable1+"_colors"])),title="## Normalized by total of each region and total of each leiden",
)
fig.update_traces(marker_line_color='black',
marker_line_width=1, opacity=1)
fig.update_layout( yaxis = dict( tickfont = dict(size=30), title_font=dict(size=30)),legend = dict(font = dict(size = 30)),
xaxis = dict( tickfont = dict(size=30), title_font=dict(size=30))
)
fig.write_image(figDir+"/"+nb_fname+"Leiden_Region_Norm.polaroidOnly"+".pdf")
fig.show()
## Not normalized
adata = sc.read_h5ad(adataPath)
adata.obs["leidenAnno"] = adata.obs["leiden0.3"].replace(leiden3Mapping).astype("category").cat.reorder_categories(list(leiden3Mapping.values()))
adata.uns["leidenAnno_colors"] = adata.uns["leiden0.3_colors"].copy()
#adata = adata[adata.obs["type"].isin(["polaroid"])]
variable1="region"
variable2="leidenAnno"
regionOrder=["proximal","medial","distal","piece1","piece2","piece3"]
compositions = pd.DataFrame(adata.obs.groupby([variable1,variable2]).size())
compositions = compositions.reset_index().rename(columns={0:"number_of_cells"})
fig = px.bar(compositions, x=variable2, y="number_of_cells", color=variable1,
category_orders={variable2:leidenOrder,variable1:regionOrder},
height=800,width=1000, template="plotly_white",
color_discrete_map = dict(zip(adata.obs[variable1].cat.categories,adata.uns[variable1+"_colors"])),title="## Not normalized",
)
fig.update_traces(marker_line_color='black',
marker_line_width=1, opacity=1)
fig.update_layout( yaxis = dict( tickfont = dict(size=30), title_font=dict(size=30)),legend = dict(font = dict(size = 30)),
xaxis = dict( tickfont = dict(size=30), title_font=dict(size=30))
)
fig.update_xaxes(tickangle=-45)
#fig.write_image(figDir+"/"+outBaseName+"Not_norm.polaroidOnly"+".pdf")
fig.show()
## Normalized by region
compositions = pd.DataFrame(adata.obs.groupby([variable1,variable2]).size())
compositions = compositions.reset_index().rename(columns={0:"number_of_cells"})
compositions["number_of_cells_normByregion"] = np.array(compositions["number_of_cells"]) / np.array(compositions.groupby(variable1)["number_of_cells"].sum()[compositions[variable1]])
fig = px.bar(compositions, x=variable2, y="number_of_cells_normByregion", color=variable1,
height=800,width=1000, template="plotly_white",
category_orders={variable2:leidenOrder,variable1:regionOrder},
color_discrete_map = dict(zip(adata.obs[variable1].cat.categories,adata.uns[variable1+"_colors"])),title="## Normalized by total of each region",
)
fig.update_traces(marker_line_color='black',
marker_line_width=1, opacity=1)
fig.update_layout( yaxis = dict( tickfont = dict(size=30), title_font=dict(size=30)),legend = dict(font = dict(size = 30)),
xaxis = dict( tickfont = dict(size=30), title_font=dict(size=30))
)
fig.update_xaxes(tickangle=-45)
#fig.write_image(figDir+"/"+outBaseName+"Region_Norm.polaroidOnly"+".pdf")
fig.show()
## Normalized by region and leiden
compositions = pd.DataFrame(adata.obs.groupby([variable1,variable2]).size())
compositions = compositions.reset_index().rename(columns={0:"number_of_cells"})
compositions["number_of_cells_normByregion"] = np.array(compositions["number_of_cells"]) / np.array(compositions.groupby(variable1)["number_of_cells"].sum()[compositions[variable1]])
compositions["number_of_cells_normByregionANDLeiden"] = np.array(compositions["number_of_cells_normByregion"]) / np.array(compositions.groupby(variable2)["number_of_cells_normByregion"].sum()[compositions[variable2]])
fig = px.bar(compositions, x=variable2, y="number_of_cells_normByregionANDLeiden", color=variable1,
height=800,width=1000, template="plotly_white",
category_orders={variable2:leidenOrder,variable1:regionOrder},
color_discrete_map = dict(zip(adata.obs[variable1].cat.categories,adata.uns[variable1+"_colors"])),title="## Normalized by total of each region and total of each leiden",
)
fig.update_traces(marker_line_color='black',
marker_line_width=1, opacity=1)
fig.update_layout( yaxis = dict( tickfont = dict(size=30), title_font=dict(size=30)),legend = dict(font = dict(size = 30)),
xaxis = dict( tickfont = dict(size=30), title_font=dict(size=30))
)
fig.update_xaxes(tickangle=-45)
fig.write_image(figDir+"/"+nb_fname+"Leiden_Region_Norm.AllByRegion."+".pdf")
fig.show()
## Not normalized
variable1="type"
variable2="leidenAnno"
adata = sc.read_h5ad(adataPath)
adata.obs["leidenAnno"] = adata.obs["leiden0.3"].replace(leiden3Mapping).astype("category").cat.reorder_categories(list(leiden3Mapping.values()))
adata.uns["leidenAnno_colors"] = adata.uns["leiden0.3_colors"].copy()
compositions = pd.DataFrame(adata.obs.groupby([variable1,variable2]).size())
compositions = compositions.reset_index().rename(columns={0:"number_of_cells"})
compositions["number_of_cells_normByregion"] = np.array(compositions["number_of_cells"]) / np.array(compositions.groupby(variable1)["number_of_cells"].sum()[compositions[variable1]])
compositions["number_of_cells_normByregionANDleidenAnno"] = np.array(compositions["number_of_cells_normByregion"]) / np.array(compositions.groupby(variable2)["number_of_cells_normByregion"].sum()[compositions[variable2]])
#leidenOrder = compositions[compositions[variable1] == "polaroid"].sort_values("number_of_cells_normByregionANDleidenAnno", ascending=False)[variable2].tolist()
leidenOrder
compositions = pd.DataFrame(adata.obs.groupby([variable1,variable2]).size())
compositions = compositions.reset_index().rename(columns={0:"number_of_cells"})
fig = px.bar(compositions, x=variable2, y="number_of_cells", color=variable1,
category_orders={variable2:leidenOrder},
height=800,width=1000, template="plotly_white",
color_discrete_map = dict(zip(adata.obs[variable1].cat.categories,adata.uns[variable1+"_colors"])),title="## Not normalized",
)
fig.update_traces(marker_line_color='black',
marker_line_width=1, opacity=1)
fig.update_layout( yaxis = dict( tickfont = dict(size=30), title_font=dict(size=30)),legend = dict(font = dict(size = 30)),
xaxis = dict( tickfont = dict(size=30), title_font=dict(size=30))
)
fig.update_xaxes(tickangle=-45)
fig.write_image(figDir+"/"+nb_fname+"not_Norm.both"+".pdf")
fig.show()
## Normalized by region
compositions = pd.DataFrame(adata.obs.groupby([variable1,variable2]).size())
compositions = compositions.reset_index().rename(columns={0:"number_of_cells"})
compositions["number_of_cells_normByregion"] = np.array(compositions["number_of_cells"]) / np.array(compositions.groupby(variable1)["number_of_cells"].sum()[compositions[variable1]])
fig = px.bar(compositions, x=variable2, y="number_of_cells_normByregion", color=variable1,
height=800,width=1000, template="plotly_white",
category_orders={variable2:leidenOrder},
color_discrete_map = dict(zip(adata.obs[variable1].cat.categories,adata.uns[variable1+"_colors"])),title="## Normalized by total of each region",
)
fig.update_traces(marker_line_color='black',
marker_line_width=1, opacity=1)
fig.update_layout( yaxis = dict( tickfont = dict(size=30), title_font=dict(size=30)),legend = dict(font = dict(size = 30)),
xaxis = dict( tickfont = dict(size=30), title_font=dict(size=30))
)
fig.update_xaxes(tickangle=-45)
fig.write_image(figDir+"/"+nb_fname+"Region_Norm.both"+".pdf")
fig.show()
## Normalized by region and leiden
compositions = pd.DataFrame(adata.obs.groupby([variable1,variable2]).size())
compositions = compositions.reset_index().rename(columns={0:"number_of_cells"})
compositions["number_of_cells_normByregion"] = np.array(compositions["number_of_cells"]) / np.array(compositions.groupby(variable1)["number_of_cells"].sum()[compositions[variable1]])
compositions["number_of_cells_normByregionANDLeiden"] = np.array(compositions["number_of_cells_normByregion"]) / np.array(compositions.groupby(variable2)["number_of_cells_normByregion"].sum()[compositions[variable2]])
fig = px.bar(compositions, x=variable2, y="number_of_cells_normByregionANDLeiden", color=variable1,
height=800,width=1000, template="plotly_white",
category_orders={variable2:leidenOrder},
color_discrete_map = dict(zip(adata.obs[variable1].cat.categories,adata.uns[variable1+"_colors"])),title="## Normalized by total of each region and total of each leiden",
)
fig.update_traces(marker_line_color='black',
marker_line_width=1, opacity=1)
fig.update_layout( yaxis = dict( tickfont = dict(size=30), title_font=dict(size=30)),legend = dict(font = dict(size = 30)),
xaxis = dict( tickfont = dict(size=30), title_font=dict(size=30))
)
fig.update_xaxes(tickangle=-45)
fig.write_image(figDir+"/"+nb_fname+"Leiden_Region_Norm.both"+".pdf", scale=3)
fig.show()
adata = sc.read_h5ad(adataPath)
adata.obs["leidenAnno"] = adata.obs["leiden0.3"].replace(leiden3Mapping).astype("category").cat.reorder_categories(list(leiden3Mapping.values()))
adata.uns["leidenAnno_colors"] = adata.uns["leiden0.3_colors"].copy()
for org in adata.obs.organoid.unique() :
print("Plot for organoid {}".format(org))
print("Plot for organoid {}".format(org))
sc.pl.umap(adata, color=["organoid"],groups= org, size = 30, add_outline = True,outline_width=(0.2, 0.05), frameon=False)
adataSS = adata[adata.obs.organoid==org]
sc.pl.umap(adataSS, color=["leidenAnno"], size = 30, add_outline = True,outline_width=(0.2, 0.05), frameon=False)
sc.pl.umap(adataSS, color=["organoid"], size = 30, add_outline = True,outline_width=(0.2, 0.05), frameon=False)
Plot for organoid control3 Plot for organoid control3
Plot for organoid control1 Plot for organoid control1
Plot for organoid control2 Plot for organoid control2